library(tidyverse)
## ── Attaching packages ─────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(leaflet)
library(ggplot2)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Read in wine data.
year_extract <- function(string) {
t <- regmatches(string, regexec("[1-2][9|0][0-9][0-9]", string))
sapply(t, function(x) {
if (length(x) > 0) {
return(as.numeric(x))
} else {
return(NA)
}
})
}
wine_tidy_df =
read_csv(
"./wine_data/winemag-data-130k-v2.csv") %>%
janitor::clean_names() %>%
select(-region_2, -taster_twitter_handle, -taster_name, -x1) %>%
mutate(year = year_extract(title))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## country = col_character(),
## description = col_character(),
## designation = col_character(),
## points = col_double(),
## price = col_double(),
## province = col_character(),
## region_1 = col_character(),
## region_2 = col_character(),
## taster_name = col_character(),
## taster_twitter_handle = col_character(),
## title = col_character(),
## variety = col_character(),
## winery = col_character()
## )
wine_us = wine_tidy_df %>%
filter(country == "US") %>%
rename(
state = province) %>%
mutate(
state = na_if(state, "America")
) %>%
select(state, points, price) %>%
drop_na() %>%
group_by(state) %>%
summarise(
total = n(),
avg_rating = mean(points),
avg_price = mean(price)
) %>%
mutate(
avg_rating = round(avg_rating, 0),
avg_price = round(avg_price, 2)
) %>%
arrange(desc(total)) %>%
view
## `summarise()` ungrouping output (override with `.groups` argument)
wine_us_missing = wine_tidy_df %>%
filter(country == "US") %>%
rename(
state = province) %>%
select(state, points, price) %>%
mutate(
state = na_if(state, "America")
)
map(wine_us_missing, ~ sum(is.na(.)))
## $state
## [1] 95
##
## $points
## [1] 0
##
## $price
## [1] 239
Missing Values: * State: 95 * Points: 0 * Price: 239 Several wines that were made in the US did not have a province/state listed. Instead they were labeled as America. Recoded America into NA. many wines also did not have prices listed. Wines that did not have points, prices, or province were excluded from the final mapping dataset…
Rounded rating to nearest whole number and price to 2 decimal places.
wine_by_country = wine_tidy_df %>%
mutate(
country = recode(country, US = "United States")
) %>%
select(country, points, price) %>%
group_by(country) %>%
drop_na() %>%
summarise(
total = n(),
avg_rating = mean(points),
avg_price = mean(price)
) %>%
mutate(
avg_rating = round(avg_rating, 0),
avg_price = round(avg_price, 2)
) %>%
arrange(desc(total)) %>%
view
## `summarise()` ungrouping output (override with `.groups` argument)
wine_country_missing = wine_tidy_df %>%
mutate(
country = recode(country, US = "United States")
) %>%
select(country, points, price)
map(wine_country_missing, ~ sum(is.na(.)))
## $country
## [1] 63
##
## $points
## [1] 0
##
## $price
## [1] 8996
Missing Values: * Country: 63 * Points: 0 * Price: 8996 Several wines did not contain country of origin or price. Wines that did not have country or prices were excluded from the final world mapping dataset…
states <- states(cb = TRUE)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|========================================== | 61%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
states %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
states_merged_wine <- geo_join(states, wine_us, "NAME", "state")
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
mybins <- c(0,100,1000,8000,10000,40000)
mypal <- colorBin(palette = "Purples", domain = states_merged_wine$total, na.color = "transparent", bins = mybins)
states_merged_wine <- subset(states_merged_wine, !is.na(total))
popup <- paste0(
states_merged_wine$NAME,"<br>",
"Wineries: ", states_merged_wine$total, "<br>",
"Avg Rating: ", states_merged_wine$avg_rating, "<br>",
"Avg Price: ", states_merged_wine$avg_price,"<br>"
)
state_map = states_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(-98.483330, 38.712046, zoom = 4) %>%
addPolygons(
fillColor = ~mypal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~popup) %>%
addLegend(pal = mypal,
values = states_merged_wine$total,
position = "bottomright",
title = "Wineries")
## Warning: sf layer has inconsistent datum (+proj=longlat +datum=NAD83 +no_defs).
## Need '+proj=longlat +datum=WGS84'
state_map
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.1, released 2020/06/22
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/sf/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
world_spdf <- readOGR(
dsn = paste0(getwd(),"/wine_data/world_shape_file/") ,
layer = "TM_WORLD_BORDERS_SIMPL-0.3",
verbose = FALSE
)
world_spdf %>%
leaflet() %>%
addTiles() %>%
addPolygons(popup = ~NAME)
countries_merged_wine <- geo_join(world_spdf, wine_by_country, "NAME", "country")
world_bins <- c(0,100,1000,10000,20000,30000,60000)
world_pal <- colorBin(palette = "Reds", domain = countries_merged_wine$total, na.color = "transparent", bins = world_bins)
countries_merged_wine <- subset(countries_merged_wine, !is.na(total))
world_popup <- paste0(
countries_merged_wine$country,"<br>",
"Wineries: ", countries_merged_wine$total, "<br>",
"Avg Rating: ", countries_merged_wine$avg_rating, "<br>",
"Avg Price: ", countries_merged_wine$avg_price,"<br>"
)
world_map_nolab = countries_merged_wine %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView( lat = 10, lng = 0 , zoom = 2) %>%
addPolygons(
fillColor = ~world_pal(total),
fillOpacity = 1.0,
weight = 0.4,
smoothFactor = 0.2,
popup = ~world_popup) %>%
addLegend(pal = world_pal,
values = countries_merged_wine$total,
position = "bottomright",
title = "Wineries")
world_map_nolab
world_map_labels = countries_merged_wine %>%
leaflet() %>%
addMapPane(name = "polygons", zIndex = 410) %>%
addMapPane(name = "maplabels", zIndex = 420) %>%
addProviderTiles("CartoDB.PositronNoLabels") %>%
addProviderTiles("CartoDB.PositronOnlyLabels",
options = leafletOptions(pane = "maplabels"),
group = "map labels") %>%
setView( lat = 10, lng = 0 , zoom = 2) %>%
addPolygons(
fillColor = ~world_pal(total),
fillOpacity = 1.0,
group = "country",
weight = 0.4,
smoothFactor = 0.2,
popup = ~world_popup,
options = leafletOptions(pane = "polygons")) %>%
addLayersControl(baseGroups = "CartoDB.PositronNoLabels",
overlayGroups = c("map labels",
"country"))
world_map_labels
Is legend necessary?
For world map, aesthetics….overlay is a lil funky looking Adding labels may help… (not sure how to deal with the overlap button tho…)
BaseMap, dark or light? Base Map options: http://leaflet-extras.github.io/leaflet-providers/preview/
providers$CartoDB.DarkMatter
Code adapted from https://www.r-graph-gallery.com/183-choropleth-map-with-leaflet.html